# ------------------------------------------------------------------------------#
# Description: Create Table 1: Summary Statistics
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, modelsummary, dplyr, stringr)

# Load and Process Adoption Data ------------------------------------------------------------------------------

load("data/adoptions/adoptions.RData")

adopt[, adopted.any     := max(adopt.any, na.rm = TRUE), by = pin]
adopt[, adopted.rg      := max(adopt.rg, na.rm = TRUE), by = pin]
adopt[, adopted.any.rg  := max(adopt.any.rg, na.rm = TRUE), by = pin]
adopt[, adopted.cs      := max(adopt.cs, na.rm = TRUE), by = pin]
adopt[, adopted.both    := max(adopt.both, na.rm = TRUE), by = pin]

# Merge Peer Eligibility - CS ------------------------------------------------------------------------------

load("data/bucket/peer_elig_buckets_cs.RData")

peer.buckets.cs[, max.elig.cs.100 := max(elig.peers.cs.100), by = pin]
peer.buckets.cs[, max.elig.cs.20  := max(elig.peers.cs.20), by = pin]

dat <- merge(
  unique(adopt[parcel.elig > 0, .(pin, adopted.any, adopted.rg, adopted.cs,
                                  adopted.both, parcel.elig, elig.cs, elig.rg)]),
  unique(peer.buckets.cs[, .(pin, max.elig.cs.100, max.elig.cs.20)]),
  by = "pin"
)

rm(adopt, peer.buckets.cs)

# Merge Peer Eligibility - RG ------------------------------------------------------------------------------

load("data/bucket/peer_elig_buckets_rg.RData")

peer.buckets.rg[, max.elig.rg.100 := max(elig.peers.rg.100), by = pin]
peer.buckets.rg[, max.elig.rg.20  := max(elig.peers.rg.20), by = pin]

dat <- merge(dat, unique(peer.buckets.rg[, .(pin, max.elig.rg.100, max.elig.rg.20)]), by = "pin")

rm(peer.buckets.rg)

# Create Combined Peer Eligibility ------------------------------------------------------------------------------

dat[, max.elig.avg.100 := (max.elig.cs.100 + max.elig.rg.100) / 2]
dat[, max.elig.avg.20  := (max.elig.cs.20  + max.elig.rg.20) / 2]

# Merge Peer Adoption Data ------------------------------------------------------------------------------

load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(dat, peer.adopt.bucket.any[year == 2020, .(pin, peer.adopt.any.100, peer.adopt.any.20)], by = "pin")
rm(peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg[year == 2020, .(pin, peer.adopt.rg.100, peer.adopt.rg.20)], by = "pin")
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs[year == 2020, .(pin, peer.adopt.cs.100, peer.adopt.cs.20)], by = "pin")
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both[year == 2020, .(pin, peer.adopt.both.100, peer.adopt.both.20)], by = "pin")
rm(peer.adopt.bucket.both)

# Merge Parcel Characteristics ------------------------------------------------------------------------------

load("data/parcel_build/parcel_build.RData")

dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve, ren.10)],
             by = "pin")

dat[, assess.value := val.land + val.improve]

rm(parcel.build)

# Create Summary Statistics Tables ------------------------------------------------------------------------------

# Set LaTeX numeric formatting for modelsummary
options("modelsummary_format_numeric_latex" = "plain")

# Cistern Eligibility Summary ------------------------------------------------------------------------------

treat <- dat[elig.cs == 1, .(
  adopted.any, adopted.rg, adopted.cs, adopted.both,
  peer.adopt.any.100, peer.adopt.any.20,
  max.elig.cs.100, max.elig.rg.100,
  max.elig.cs.20, max.elig.rg.20
)]

setnames(treat, c(
  "Any", "Rain Garden", "Cistern", "Both",
  "100 Peers", "20 Peers",
  "100 Eligible (CS)", "100 Eligible (RG)",
  "20 Eligible (CS)", "20 Eligible (RG)"
))

# Preview table
tab <- datasummary(
  All(treat) ~ Mean + SD + N,
  fmt = 3,
  data = treat,
  output = "data.frame"
)
print(tab)

# Add section labels and export to LaTeX
new_rows <- data.frame(
  a = c("\\bf Adoption", "\\bf Peer Adoption", "\\bf Peer Eligibility"),
  b = "", c = "", d = ""
)
attr(new_rows, "position") <- c(1, 6, 9)

datasummary(
  All(treat) ~ Mean + SD + N,
  fmt = 3,
  data = treat,
  sparse_header = TRUE,
  output = "output/tables/table_1a.tex",
  add_rows = new_rows
)

# Rain Garden Eligibility Summary ------------------------------------------------------------------------------

treat <- dat[elig.rg == 1, .(
  adopted.any, adopted.rg, adopted.cs, adopted.both,
  peer.adopt.any.100, peer.adopt.any.20,
  max.elig.cs.100, max.elig.rg.100,
  max.elig.cs.20, max.elig.rg.20
)]

setnames(treat, c(
  "Any", "Rain Garden", "Cistern", "Both",
  "100 Peers", "20 Peers",
  "100 Eligible (CS)", "100 Eligible (RG)",
  "20 Eligible (CS)", "20 Eligible (RG)"
))

# Preview table
tab <- datasummary(
  All(treat) ~ Mean + SD + N,
  fmt = 3,
  data = treat,
  output = "data.frame"
)
print(tab)

# Add section labels and export to LaTeX
new_rows <- data.frame(
  a = c("\\bf Adoption", "\\bf Peer Adoption", "\\bf Peer Eligibility"),
  b = "", c = "", d = ""
)
attr(new_rows, "position") <- c(1, 6, 9)

datasummary(
  All(treat) ~ Mean + SD + N,
  fmt = 3,
  data = treat,
  sparse_header = TRUE,
  output = "output/tables/table_1b.tex",
  add_rows = new_rows
)